Chapter 8

knitr::opts_chunk$set(cache = TRUE,autodep = TRUE)
library(rethinking)
## Loading required package: rstan
## Loading required package: ggplot2
## Loading required package: StanHeaders
## rstan (Version 2.10.1, packaged: 2016-06-24 13:22:16 UTC, GitRev: 85f7a56811da)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.59)
library(ggplot2)
library(reshape2)

8E1

The simple Metropolis algorhithim requires that the proposal distribution be symmetric

8E2

Gibbs sampling achieves its efficiency by using conjugate priors. Maybe you don’t want to use conjugate priors. Also becomes very ineffecient with large numbers of parameters.

8E3

HMC requires cannot handle discrete parameters. This is because it is sampling the probability space by gliding through it, and it is unclear how to glide between discrete points.

8E4

Because there is autocorrelation in MCMC chains samples are not independent and the effective number of samples is reduced. Neff estimates this.

8E5

Rhat should approach 1 (from above)

8E6

Should stabilize

8M1

Re-estimate the terrain ruggedness model from the chapter but now using a uniform prior and an exponential prior for sigma. Any difference in postieror distribition?

library(rethinking)
data(rugged)
d <- rugged
d$log_gdp <- log(d$rgdppc_2000)
dd <- d[ complete.cases(d$rgdppc_2000) , ]

dd.trim <- dd[ , c("log_gdp","rugged","cont_africa") ]

m8m1A.stan <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.155532 seconds (Warm-up)
##                0.160547 seconds (Sampling)
##                0.316079 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 2e-06 seconds (Warm-up)
##                6.2e-05 seconds (Sampling)
##                6.4e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8m1_dunif.stan <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dunif(0,10)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.165318 seconds (Warm-up)
##                0.158564 seconds (Sampling)
##                0.323882 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                6.8e-05 seconds (Sampling)
##                7.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8m1_exp.stan <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(1)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.18534 seconds (Warm-up)
##                0.166415 seconds (Sampling)
##                0.351755 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                6.1e-05 seconds (Sampling)
##                6.4e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
precis(m8m1A.stan)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.14       9.02       9.44   368    1
## bR    -0.20   0.07      -0.32      -0.09   402    1
## bA    -1.94   0.22      -2.25      -1.56   405    1
## bAR    0.39   0.13       0.18       0.60   429    1
## sigma  0.95   0.05       0.88       1.03   665    1
precis(m8m1_dunif.stan)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.13       9.03       9.44   392    1
## bR    -0.20   0.07      -0.32      -0.09   310    1
## bA    -1.94   0.22      -2.32      -1.62   439    1
## bAR    0.39   0.12       0.21       0.60   437    1
## sigma  0.95   0.05       0.88       1.04   705    1
precis(m8m1_exp.stan)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.23   0.14       9.01       9.44   319    1
## bR    -0.20   0.08      -0.33      -0.09   295    1
## bA    -1.95   0.22      -2.34      -1.63   375    1
## bAR    0.40   0.13       0.20       0.62   364    1
## sigma  0.95   0.05       0.87       1.03   615    1
sigmas <- data.frame(
  cauchy=extract.samples(m8m1A.stan)$sigma,
  unif=extract.samples(m8m1_dunif.stan)$sigma,
  exp=extract.samples(m8m1_exp.stan)$sigma)

sigmas.m <- melt(sigmas,variable.name="prior.function",value.name="sigma")
## No id variables; using all as measure variables
# summary(sigmas.m)
dim(sigmas.m$sigma) <- 3000 #strange that I need to do this!!
summary(sigmas.m)
##  prior.function     sigma       
##  cauchy:1000    Min.   :0.8082  
##  unif  :1000    1st Qu.:0.9138  
##  exp   :1000    Median :0.9471  
##                 Mean   :0.9497  
##                 3rd Qu.:0.9826  
##                 Max.   :1.1270
pl <- ggplot(sigmas.m, aes(sigma,color=prior.function,fill=prior.function))
pl + geom_density(alpha=.2)

pl + geom_density() + facet_grid(prior.function ~ .)

Look very similar. I assume the differences are due to sampling.

8M2 Use smaller values of scaling parameter for sigma prior to see how that changes things…

First the cauchy model

What happends to cauchy distribution at different scales?

x <- seq(-10,10,.1)
scale <- c(2,1,.5,.2)
cauchy.dist <- as.data.frame(cbind(x,sapply(scale,function(s) dcauchy(x,location=0,scale = s))))
colnames(cauchy.dist)[2:5] <- as.character(scale)
head(cauchy.dist)
##       x           2           1         0.5          0.2
## 1 -10.0 0.006121344 0.003151583 0.001587580 0.0006363652
## 2  -9.9 0.006240758 0.003214927 0.001619733 0.0006492807
## 3  -9.8 0.006363652 0.003280193 0.001652871 0.0006625934
## 4  -9.7 0.006490160 0.003347459 0.001687036 0.0006763197
## 5  -9.6 0.006620422 0.003416809 0.001722270 0.0006904770
## 6  -9.5 0.006754586 0.003488328 0.001758618 0.0007050834
cauchy.dist.m <- melt(cauchy.dist,id.vars="x",variable.name="scale.value",value.name="y")
head(cauchy.dist.m)
##       x scale.value           y
## 1 -10.0           2 0.006121344
## 2  -9.9           2 0.006240758
## 3  -9.8           2 0.006363652
## 4  -9.7           2 0.006490160
## 5  -9.6           2 0.006620422
## 6  -9.5           2 0.006754586
pl <- ggplot(cauchy.dist.m,aes(x=x,y=y,fill=scale.value,color=scale.value))
pl <- pl + geom_density(stat="identity",alpha=0.1)
pl + ggtitle("cauchy distribution at different scales")

What happends to exp distribution at different rates?

x <- seq(-10,10,.1)
rate <- c(2,1,.5,.2)
exp.dist <- as.data.frame(cbind(x,sapply(rate,function(r) dexp(x,rate = r))))
colnames(exp.dist)[2:5] <- as.character(rate)
head(exp.dist)
##       x 2 1 0.5 0.2
## 1 -10.0 0 0   0   0
## 2  -9.9 0 0   0   0
## 3  -9.8 0 0   0   0
## 4  -9.7 0 0   0   0
## 5  -9.6 0 0   0   0
## 6  -9.5 0 0   0   0
exp.dist.m <- melt(exp.dist,id.vars="x",variable.name="rate.value",value.name="y")
head(exp.dist.m)
##       x rate.value y
## 1 -10.0          2 0
## 2  -9.9          2 0
## 3  -9.8          2 0
## 4  -9.7          2 0
## 5  -9.6          2 0
## 6  -9.5          2 0
pl <- ggplot(exp.dist.m,aes(x=x,y=y,fill=rate.value,color=rate.value))
pl <- pl + geom_density(stat="identity",alpha=0.1)
pl + ggtitle("exponential distribution at different rates")

Fit a series of cauchy models with smaller scales

m8M2.stan.cauchy2 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.168448 seconds (Warm-up)
##                0.165375 seconds (Sampling)
##                0.333823 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                5.9e-05 seconds (Sampling)
##                6.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.cauchy1 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,1)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.177341 seconds (Warm-up)
##                0.181478 seconds (Sampling)
##                0.358819 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 2e-06 seconds (Warm-up)
##                5.8e-05 seconds (Sampling)
##                6e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.cauchy.5 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,.5)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.166094 seconds (Warm-up)
##                0.173018 seconds (Sampling)
##                0.339112 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                9.5e-05 seconds (Sampling)
##                9.9e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.cauchy.2 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,.2)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.162987 seconds (Warm-up)
##                0.164945 seconds (Sampling)
##                0.327932 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                5.8e-05 seconds (Sampling)
##                6.1e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

look at posteriors

precis(m8M2.stan.cauchy2)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.24   0.14       9.03       9.46   370    1
## bR    -0.21   0.08      -0.35      -0.10   338    1
## bA    -1.97   0.22      -2.31      -1.61   396    1
## bAR    0.41   0.13       0.19       0.61   376    1
## sigma  0.95   0.05       0.88       1.03   720    1
precis(m8M2.stan.cauchy1)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.14       8.99       9.46   188    1
## bR    -0.20   0.08      -0.32      -0.08   317    1
## bA    -1.94   0.23      -2.31      -1.58   200    1
## bAR    0.39   0.14       0.19       0.63   213    1
## sigma  0.95   0.05       0.87       1.03   625    1
precis(m8M2.stan.cauchy.5)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.21   0.14       8.98       9.41   375    1
## bR    -0.20   0.08      -0.32      -0.08   393    1
## bA    -1.93   0.22      -2.26      -1.58   370    1
## bAR    0.38   0.13       0.18       0.59   405    1
## sigma  0.95   0.05       0.86       1.03   639    1
precis(m8M2.stan.cauchy.2)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.21   0.13       9.01       9.44   287 1.01
## bR    -0.20   0.08      -0.31      -0.07   297 1.01
## bA    -1.94   0.22      -2.26      -1.55   348 1.00
## bAR    0.38   0.13       0.18       0.59   353 1.00
## sigma  0.94   0.05       0.87       1.02   705 1.00

plot posteriors

fits <- ls(pattern="m8M2.stan.cauch")
sigmas <- sapply(fits, function(x) extract.samples(get(x))$sigma)
colnames(sigmas) <- fits
sigmas.m <- melt(sigmas)
pl <- ggplot(sigmas.m,aes(x=value,color=Var2,fill=Var2))
pl <- pl + geom_density(alpha=0.1)
pl

exp models with smaller scales:

m8M2.stan.exp1 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(1)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.248328 seconds (Warm-up)
##                0.165953 seconds (Sampling)
##                0.414281 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                5.9e-05 seconds (Sampling)
##                6.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.exp.5 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(.5)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.172952 seconds (Warm-up)
##                0.1892 seconds (Sampling)
##                0.362152 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 5e-06 seconds (Warm-up)
##                0.000101 seconds (Sampling)
##                0.000106 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.exp.2 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(.2)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.164668 seconds (Warm-up)
##                0.163552 seconds (Sampling)
##                0.32822 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 2e-06 seconds (Warm-up)
##                6.1e-05 seconds (Sampling)
##                6.3e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.exp.1 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(.1)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.190367 seconds (Warm-up)
##                0.167493 seconds (Sampling)
##                0.35786 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                0.000111 seconds (Sampling)
##                0.000115 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8M2.stan.exp.001 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dexp(.001)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.160757 seconds (Warm-up)
##                0.177289 seconds (Sampling)
##                0.338046 seconds (Total)
## The following numerical problems occured the indicated number of times after warmup on chain 1
##                                                                                 count
## Exception thrown at line 24: normal_log: Scale parameter is 0, but must be > 0!     2
## When a numerical problem occurs, the Metropolis proposal gets rejected.
## However, by design Metropolis proposals sometimes get rejected even when there are no numerical problems.
## Thus, if the number in the 'count' column is small, do not ask about this message on stan-users.
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                5.8e-05 seconds (Sampling)
##                6.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

look at posteriors

precis(m8M2.stan.exp1)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.14       9.01       9.45   313    1
## bR    -0.20   0.07      -0.31      -0.08   294    1
## bA    -1.94   0.22      -2.29      -1.60   358    1
## bAR    0.40   0.13       0.17       0.59   313    1
## sigma  0.95   0.05       0.88       1.03   811    1
precis(m8M2.stan.exp.5)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.23   0.14       8.98       9.44   317    1
## bR    -0.21   0.08      -0.33      -0.08   280    1
## bA    -1.96   0.23      -2.28      -1.55   283    1
## bAR    0.40   0.13       0.19       0.61   272    1
## sigma  0.95   0.05       0.86       1.02   708    1
precis(m8M2.stan.exp.2)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.13       9.00       9.43   374 1.00
## bR    -0.20   0.07      -0.30      -0.06   386 1.00
## bA    -1.94   0.23      -2.29      -1.56   310 1.00
## bAR    0.39   0.13       0.19       0.59   271 1.00
## sigma  0.95   0.05       0.87       1.03   598 1.01
precis(m8M2.stan.exp.1)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.14       8.97       9.43   383 1.01
## bR    -0.20   0.08      -0.32      -0.08   381 1.01
## bA    -1.94   0.23      -2.33      -1.62   350 1.01
## bAR    0.39   0.13       0.19       0.58   318 1.00
## sigma  0.95   0.05       0.87       1.03   871 1.00
precis(m8M2.stan.exp.001) #rhat going up?
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.21   0.14       8.97       9.42   245    1
## bR    -0.20   0.08      -0.32      -0.07   275    1
## bA    -1.94   0.23      -2.28      -1.58   334    1
## bAR    0.39   0.13       0.18       0.60   301    1
## sigma  0.95   0.05       0.88       1.03   574    1

plot posteriors

fits <- ls(pattern="m8M2.stan.exp")
sigmas <- sapply(fits, function(x) extract.samples(get(x))$sigma)
colnames(sigmas) <- fits
sigmas.m <- melt(sigmas)
pl <- ggplot(sigmas.m,aes(x=value,color=Var2,fill=Var2))
pl <- pl + geom_density(alpha=0.1)
pl

8M3

Restimate a stan model at different number of warm up samples.

m8m3.stan.warm1000 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.186122 seconds (Warm-up)
##                0.163903 seconds (Sampling)
##                0.350025 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                6e-05 seconds (Sampling)
##                6.3e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8m3.stan.warm100 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim, warmup = 100, iter = 1100 )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: The initial buffer, adaptation window, and terminal buffer
##          overflow the total number of warmup iterations.
##          Defaulting to a 15%/75%/10% partition,
##            init_buffer = 15
##            adapt_window = 75
##            term_buffer = 10
## 
## 
## Chain 1, Iteration:    1 / 1100 [  0%]  (Warmup)
## Chain 1, Iteration:  101 / 1100 [  9%]  (Sampling)
## Chain 1, Iteration:  210 / 1100 [ 19%]  (Sampling)
## Chain 1, Iteration:  320 / 1100 [ 29%]  (Sampling)
## Chain 1, Iteration:  430 / 1100 [ 39%]  (Sampling)
## Chain 1, Iteration:  540 / 1100 [ 49%]  (Sampling)
## Chain 1, Iteration:  650 / 1100 [ 59%]  (Sampling)
## Chain 1, Iteration:  760 / 1100 [ 69%]  (Sampling)
## Chain 1, Iteration:  870 / 1100 [ 79%]  (Sampling)
## Chain 1, Iteration:  980 / 1100 [ 89%]  (Sampling)
## Chain 1, Iteration: 1090 / 1100 [ 99%]  (Sampling)
## Chain 1, Iteration: 1100 / 1100 [100%]  (Sampling)
##  Elapsed Time: 0.025018 seconds (Warm-up)
##                0.164472 seconds (Sampling)
##                0.18949 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                0.000117 seconds (Sampling)
##                0.000121 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8m3.stan.warm50 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim, warmup = 50, iter = 1050 )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: The initial buffer, adaptation window, and terminal buffer
##          overflow the total number of warmup iterations.
##          Defaulting to a 15%/75%/10% partition,
##            init_buffer = 7
##            adapt_window = 38
##            term_buffer = 5
## 
## 
## Chain 1, Iteration:    1 / 1050 [  0%]  (Warmup)
## Chain 1, Iteration:   51 / 1050 [  4%]  (Sampling)
## Chain 1, Iteration:  155 / 1050 [ 14%]  (Sampling)
## Chain 1, Iteration:  260 / 1050 [ 24%]  (Sampling)
## Chain 1, Iteration:  365 / 1050 [ 34%]  (Sampling)
## Chain 1, Iteration:  470 / 1050 [ 44%]  (Sampling)
## Chain 1, Iteration:  575 / 1050 [ 54%]  (Sampling)
## Chain 1, Iteration:  680 / 1050 [ 64%]  (Sampling)
## Chain 1, Iteration:  785 / 1050 [ 74%]  (Sampling)
## Chain 1, Iteration:  890 / 1050 [ 84%]  (Sampling)
## Chain 1, Iteration:  995 / 1050 [ 94%]  (Sampling)
## Chain 1, Iteration: 1050 / 1050 [100%]  (Sampling)
##  Elapsed Time: 0.025709 seconds (Warm-up)
##                0.157805 seconds (Sampling)
##                0.183514 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                6e-05 seconds (Sampling)
##                6.4e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m8m3.stan.warm10 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim, warmup = 10, iter = 1010 )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration:    1 / 1010 [  0%]  (Warmup)
## Chain 1, Iteration:   11 / 1010 [  1%]  (Sampling)
## Chain 1, Iteration:  111 / 1010 [ 10%]  (Sampling)
## Chain 1, Iteration:  212 / 1010 [ 20%]  (Sampling)
## Chain 1, Iteration:  313 / 1010 [ 30%]  (Sampling)
## Chain 1, Iteration:  414 / 1010 [ 40%]  (Sampling)
## Chain 1, Iteration:  515 / 1010 [ 50%]  (Sampling)
## Chain 1, Iteration:  616 / 1010 [ 60%]  (Sampling)
## Chain 1, Iteration:  717 / 1010 [ 70%]  (Sampling)
## Chain 1, Iteration:  818 / 1010 [ 80%]  (Sampling)
## Chain 1, Iteration:  919 / 1010 [ 90%]  (Sampling)
## Chain 1, Iteration: 1010 / 1010 [100%]  (Sampling)
##  Elapsed Time: 0.011599 seconds (Warm-up)
##                0.080073 seconds (Sampling)
##                0.091672 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                5.9e-05 seconds (Sampling)
##                6.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Warning in map2stan(alist(log_gdp ~ dnorm(mu, sigma), mu <- a + bR * rugged + : There were 661 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
m8m3.stan.warm2 <- map2stan(
  alist(
    log_gdp ~ dnorm( mu , sigma ) ,
    mu <- a + bR*rugged + bA*cont_africa + bAR*rugged*cont_africa ,
    a ~ dnorm(0,100),
    bR ~ dnorm(0,10),
    bA ~ dnorm(0,10),
    bAR ~ dnorm(0,10),
    sigma ~ dcauchy(0,2)
  ), data=dd.trim, warmup = 2, iter = 1002 )
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration:    1 / 1002 [  0%]  (Warmup)
## Chain 1, Iteration:    3 / 1002 [  0%]  (Sampling)
## Chain 1, Iteration:  102 / 1002 [ 10%]  (Sampling)
## Chain 1, Iteration:  202 / 1002 [ 20%]  (Sampling)
## Chain 1, Iteration:  302 / 1002 [ 30%]  (Sampling)
## Chain 1, Iteration:  402 / 1002 [ 40%]  (Sampling)
## Chain 1, Iteration:  502 / 1002 [ 50%]  (Sampling)
## Chain 1, Iteration:  602 / 1002 [ 60%]  (Sampling)
## Chain 1, Iteration:  702 / 1002 [ 70%]  (Sampling)
## Chain 1, Iteration:  802 / 1002 [ 80%]  (Sampling)
## Chain 1, Iteration:  902 / 1002 [ 90%]  (Sampling)
## Chain 1, Iteration: 1002 / 1002 [100%]  (Sampling)
##  Elapsed Time: 0.000166 seconds (Warm-up)
##                0.049102 seconds (Sampling)
##                0.049268 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'log_gdp ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                5.9e-05 seconds (Sampling)
##                6.2e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Warning in map2stan(alist(log_gdp ~ dnorm(mu, sigma), mu <- a + bR * rugged + : There were 1000 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
precis(m8m3.stan.warm1000)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.23   0.14       9.01       9.44   278    1
## bR    -0.21   0.08      -0.33      -0.09   294    1
## bA    -1.95   0.22      -2.33      -1.64   263    1
## bAR    0.39   0.13       0.16       0.58   266    1
## sigma  0.95   0.05       0.87       1.03   695    1
precis(m8m3.stan.warm100)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.22   0.14       8.98       9.43   257    1
## bR    -0.20   0.08      -0.33      -0.08   321    1
## bA    -1.94   0.23      -2.33      -1.59   187    1
## bAR    0.39   0.13       0.18       0.58   198    1
## sigma  0.95   0.05       0.88       1.04  1000    1
precis(m8m3.stan.warm50)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      9.31   0.64       9.02       9.46    73 1.02
## bR    -0.24   0.31      -0.32      -0.07    75 1.01
## bA    -2.10   1.13      -2.36      -1.64    69 1.02
## bAR    0.47   0.58       0.18       0.60    72 1.01
## sigma  0.99   0.30       0.86       1.03    76 1.01
precis(m8m3.stan.warm10)
## Warning in precis(m8m3.stan.warm10): There were 661 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
##         Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      50.36   0.52      49.70      51.19     4 1.68
## bR    -36.90   4.15     -42.03     -29.49     3 2.04
## bA    -26.93   0.57     -27.68     -25.92     7 1.61
## bAR    22.77   0.70      22.00      24.14     5 1.49
## sigma  36.56   5.82      27.39      44.81     4 1.63
precis(m8m3.stan.warm2)
## Warning in precis(m8m3.stan.warm2): There were 1000 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
##          Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a     -109.24      0    -109.24    -109.24     1    1
## bR      -2.03      0      -2.03      -2.03     1    1
## bA      -1.57      0      -1.57      -1.57     1    1
## bAR      4.73      0       4.73       4.73     1    1
## sigma   15.20      0      15.20      15.20     1    1
plot(m8m3.stan.warm1000)
plot(m8m3.stan.warm100)

plot(m8m3.stan.warm50)

plot(m8m3.stan.warm10)

plot(m8m3.stan.warm2)

compare(m8m3.stan.warm1000,m8m3.stan.warm100,m8m3.stan.warm50,m8m3.stan.warm10,m8m3.stan.warm2)
##                       WAIC pWAIC   dWAIC weight    SE   dSE
## m8m3.stan.warm1000   468.7   4.8     0.0   0.56 14.77    NA
## m8m3.stan.warm100    469.2   5.1     0.5   0.44 14.79  0.18
## m8m3.stan.warm50     482.6  10.1    13.9   0.00 14.58  1.20
## m8m3.stan.warm10    1724.7  12.8  1256.0   0.00 34.10 34.01
## m8m3.stan.warm2    11734.6   0.0 11266.0   0.00 48.77 49.48

Things seem good with 100 warmups and even 50 but certinly not with 10 or 2; the number of effective samples drops to 1 and this is also clear from the chain traces

8H1

Run the model below and then inspect the posterior distubition and explain what it is accomplishing. Compare the samples for the parameters a and b. Can you explain the difference given what you know about the Cauchy distribution.

mp <- map2stan(
  alist(
    a ~ dnorm(0,1),
    b ~ dcauchy(0,1)
  ),
  data=list(y=1),
  start=list(a=0,b=0),
  iter=1e4, warmup=100 , WAIC=FALSE )
## 
## SAMPLING FOR MODEL 'a ~ dnorm(0, 1)' NOW (CHAIN 1).
## WARNING: The initial buffer, adaptation window, and terminal buffer
##          overflow the total number of warmup iterations.
##          Defaulting to a 15%/75%/10% partition,
##            init_buffer = 15
##            adapt_window = 75
##            term_buffer = 10
## 
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration:  101 / 10000 [  1%]  (Sampling)
## Chain 1, Iteration: 1100 / 10000 [ 11%]  (Sampling)
## Chain 1, Iteration: 2100 / 10000 [ 21%]  (Sampling)
## Chain 1, Iteration: 3100 / 10000 [ 31%]  (Sampling)
## Chain 1, Iteration: 4100 / 10000 [ 41%]  (Sampling)
## Chain 1, Iteration: 5100 / 10000 [ 51%]  (Sampling)
## Chain 1, Iteration: 6100 / 10000 [ 61%]  (Sampling)
## Chain 1, Iteration: 7100 / 10000 [ 71%]  (Sampling)
## Chain 1, Iteration: 8100 / 10000 [ 81%]  (Sampling)
## Chain 1, Iteration: 9100 / 10000 [ 91%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 0.001467 seconds (Warm-up)
##                0.144409 seconds (Sampling)
##                0.145876 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'a ~ dnorm(0, 1)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 5e-06 seconds (Warm-up)
##                2.3e-05 seconds (Sampling)
##                2.8e-05 seconds (Total)

For starters this is a wierd little setup. data is define as y=1 but this is not references in a formula. I assume that the value of y does not actually matter. So I think this is just giving us the normal and Cauchy distributions for a and b. Will check…

precis(mp)
##   Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 0.02   1.06      -1.64       1.74  9900    1
## b 0.32  18.49      -7.58       7.71   942    1
plot(precis(mp))

These show us that a has a mean of ~0 and a SD of ~1, as expected from the normal distribution. b also has a mean of 0 but a much broader SD. This fits with Cauchy being a “thick-tailed” distribution.

Looking at the traces…

plot(mp)

We see that the Cauchy distribution has some very extreme values. This fits of it being equivalent to the ratio of random draws from two Guassian distributions. It is easy to obtain an “extreme” value.

Compare to the respective random functions.

norm.df <- melt(data.frame(posterior=extract.samples(mp)$a,
                           rnorm=rnorm(9900,0,1)))
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will
## be dropped
qplot(x=value,fill=variable,alpha=0.1,data=norm.df,geom="density",main="normal")

cauchy.df <- melt(data.frame(posterior=extract.samples(mp)$b,
                             rcauchy=rcauchy(9900,0,1)))
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will
## be dropped
qplot(x=value,fill=variable,alpha=0.1,data=cauchy.df,geom="density",main="normal")

qplot(x=value,fill=variable,alpha=0.1,data=cauchy.df,geom="density",main="cauchy") + scale_x_log10()
## Warning in self$trans$transform(x): NaNs produced
## Warning: Removed 9874 rows containing non-finite values (stat_density).

Note that the last plot is not really correct; samples near 0 were dropped due to the log transformation.

8H2

Recall the Divorce Rate data set. Repeat the analyses of models 5.1, 5.2, and 5.3, but with map2stan. Use compare to compare the fits on WAIC. Explain.

data(WaffleDivorce)
d <- WaffleDivorce
# standardize predictor
d$MedianAgeMarriage.s <- (d$MedianAgeMarriage-mean(d$MedianAgeMarriage))/
  sd(d$MedianAgeMarriage)
d$Marriage.s <- (d$Marriage - mean(d$Marriage))/sd(d$Marriage)

fit model map models

m5.1 <- map(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bA * MedianAgeMarriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bA ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ) , data = d )

m5.2 <- map(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bR * Marriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bR ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ) , data = d )

m5.3 <- map(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bR*Marriage.s + bA*MedianAgeMarriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bR ~ dnorm( 0 , 1 ) ,
    bA ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ),
  data = d )

fit stan models

m5.1.stan <- map2stan(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bA * MedianAgeMarriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bA ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ) , data = d, chains=4 )
## Warning: Variable 'Marriage.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Divorce.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'MedianAgeMarriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Marriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.022578 seconds (Warm-up)
##                0.023791 seconds (Sampling)
##                0.046369 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.025524 seconds (Warm-up)
##                0.020544 seconds (Sampling)
##                0.046068 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.022011 seconds (Warm-up)
##                0.020461 seconds (Sampling)
##                0.042472 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.023698 seconds (Warm-up)
##                0.019879 seconds (Sampling)
##                0.043577 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                2.7e-05 seconds (Sampling)
##                3.1e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
m5.2.stan <- map2stan(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bR * Marriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bR ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ) , data = d, chains=4 )
## Warning: Variable 'Marriage.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Divorce.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'MedianAgeMarriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Marriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.022576 seconds (Warm-up)
##                0.019082 seconds (Sampling)
##                0.041658 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.022757 seconds (Warm-up)
##                0.022111 seconds (Sampling)
##                0.044868 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.02318 seconds (Warm-up)
##                0.021248 seconds (Sampling)
##                0.044428 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.024198 seconds (Warm-up)
##                0.021495 seconds (Sampling)
##                0.045693 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                2.7e-05 seconds (Sampling)
##                3.1e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
m5.3.stan <- map2stan(
  alist(
    Divorce ~ dnorm( mu , sigma ) ,
    mu <- a + bR*Marriage.s + bA*MedianAgeMarriage.s ,
    a ~ dnorm( 10 , 10 ) ,
    bR ~ dnorm( 0 , 1 ) ,
    bA ~ dnorm( 0 , 1 ) ,
    sigma ~ dunif( 0 , 10 )
  ),
  data = d,chains=4 )
## Warning: Variable 'Marriage.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Divorce.SE' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'MedianAgeMarriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning: Variable 'Marriage.s' contains dots '.'.
## Will attempt to remove dots internally.
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.037083 seconds (Warm-up)
##                0.040403 seconds (Sampling)
##                0.077486 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.034869 seconds (Warm-up)
##                0.029533 seconds (Sampling)
##                0.064402 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.03413 seconds (Warm-up)
##                0.033299 seconds (Sampling)
##                0.067429 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.038471 seconds (Warm-up)
##                0.037274 seconds (Sampling)
##                0.075745 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name Location is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name Loc is not numeric and not used
## 
## SAMPLING FOR MODEL 'Divorce ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                3.1e-05 seconds (Sampling)
##                3.4e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]

Not clear if the problem asks us to compare from stan to map or to compare within each engine type. Do both.

map vs stan

compare(m5.1,m5.1.stan)
## Warning in compare(m5.1, m5.1.stan): Not all model fits of same class.
## This is usually a bad idea, because it implies they were fit by different algorithms.
## Check yourself, before you wreck yourself.
##            WAIC pWAIC dWAIC weight    SE  dSE
## m5.1.stan 186.6   4.0   0.0   0.61 12.38   NA
## m5.1      187.5   4.7   0.9   0.39 14.39 2.06
compare(m5.2,m5.2.stan)
## Warning in compare(m5.2, m5.2.stan): Not all model fits of same class.
## This is usually a bad idea, because it implies they were fit by different algorithms.
## Check yourself, before you wreck yourself.
##            WAIC pWAIC dWAIC weight    SE  dSE
## m5.2.stan 200.3   3.3   0.0   0.58  9.56   NA
## m5.2      200.9   3.9   0.7   0.42 10.83 1.29
compare(m5.3,m5.3.stan)
## Warning in compare(m5.3, m5.3.stan): Not all model fits of same class.
## This is usually a bad idea, because it implies they were fit by different algorithms.
## Check yourself, before you wreck yourself.
##            WAIC pWAIC dWAIC weight    SE  dSE
## m5.3.stan 187.9   4.8   0.0   0.72 12.44   NA
## m5.3      189.9   6.1   1.9   0.28 14.52 2.13

different models

compare(m5.1,m5.2,m5.3)
##       WAIC pWAIC dWAIC weight    SE   dSE
## m5.1 187.9   4.9   0.0   0.66 14.55    NA
## m5.3 189.3   5.9   1.4   0.33 14.47  0.95
## m5.2 200.4   3.7  12.5   0.00 10.78 10.75
compare(m5.1.stan,m5.2.stan,m5.3.stan)
##            WAIC pWAIC dWAIC weight    SE  dSE
## m5.1.stan 186.6   4.0   0.0   0.66 12.38   NA
## m5.3.stan 187.9   4.8   1.3   0.34 12.44 0.78
## m5.2.stan 200.3   3.3  13.7   0.00  9.56 9.07

Either way the fits and conclusions are pretty comparable. Is this what the point was?

8H3

Sometimes changes a prior for one parameter has unanticipated effects on other parameters. This can occur when the parameters are highly correlated in the posterior. Work and think through the following example.

Simulate leg length and height for 100 individuals

N <- 100
height <- rnorm(N,10,2)
leg_prop <- runif(N,0.4,0.5)
leg_left <- leg_prop*height +
  rnorm( N , 0 , 0.02 )
leg_right <- leg_prop*height +
  rnorm( N , 0 , 0.02 )
d <- data.frame(height,leg_left,leg_right)

Original priors, now fit with Stan

m5.8s <- map2stan(
  alist(
    height ~ dnorm( mu , sigma ) ,
    mu <- a + bl*leg_left + br*leg_right ,
    a ~ dnorm( 10 , 100 ) ,
    bl ~ dnorm( 2 , 10 ) ,
    br ~ dnorm( 2 , 10 ) ,
    sigma ~ dcauchy( 0 , 1 )
  ),
  data=d, chains=4, start=list(a=10,bl=0,br=0,sigma=1) )
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 3.26275 seconds (Warm-up)
##                3.12519 seconds (Sampling)
##                6.38794 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 2.99028 seconds (Warm-up)
##                3.65859 seconds (Sampling)
##                6.64887 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 2.99187 seconds (Warm-up)
##                3.38758 seconds (Sampling)
##                6.37945 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 3.02447 seconds (Warm-up)
##                3.83954 seconds (Sampling)
##                6.86401 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 3e-06 seconds (Warm-up)
##                3.8e-05 seconds (Sampling)
##                4.1e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]

Change prior for bR so that it is alway positive:

m5.8s2 <- map2stan(
  alist(
    height ~ dnorm( mu , sigma ) ,
    mu <- a + bl*leg_left + br*leg_right ,
    a ~ dnorm( 10 , 100 ) ,
    bl ~ dnorm( 2 , 10 ) ,
    br ~ dnorm( 2 , 10 ) & T[0,] ,
    sigma ~ dcauchy( 0 , 1 )
  ),
  data=d, chains=4, start=list(a=10,bl=0,br=0,sigma=1) )
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.705203 seconds (Warm-up)
##                0.916939 seconds (Sampling)
##                1.62214 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.876934 seconds (Warm-up)
##                0.904471 seconds (Sampling)
##                1.7814 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.826935 seconds (Warm-up)
##                0.706843 seconds (Sampling)
##                1.53378 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
##  Elapsed Time: 0.700543 seconds (Warm-up)
##                0.788925 seconds (Sampling)
##                1.48947 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height ~ dnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 4e-06 seconds (Warm-up)
##                3.7e-05 seconds (Sampling)
##                4.1e-05 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
## Warning in map2stan(alist(height ~ dnorm(mu, sigma), mu <- a + bl * leg_left + : There were 2157 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

A bunch of warnings for m5.8s2…

Take a look

precis(m5.8s)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      0.77   0.28       0.29       1.19  1775    1
## bl     5.53   2.07       2.29       8.69  1052    1
## br    -3.49   2.06      -6.60      -0.19  1050    1
## sigma  0.58   0.04       0.51       0.64  1658    1
precis(m5.8s2)
## Warning in precis(m5.8s2): There were 2157 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
##       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a     0.80   0.29       0.36       1.30   292 1.02
## bl    1.24   0.77       0.21       2.12   245 1.02
## br    0.79   0.75       0.00       1.77   248 1.02
## sigma 0.59   0.04       0.52       0.66   339 1.00
op <- par(mfrow=c(1,2))
plot(precis(m5.8s))
plot(precis(m5.8s2))
## Warning in precis(m5.8s2): There were 2157 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.

par(op)

The paramter estimates and SD are really quite similar. Of course bR remains positive on the right. I suspect that the sampling problem in the second version arises when bl is proposed to be positive, then br can not be negative.

I also do not understand why, in the first model, bl is mostly negative.

more diagnostics

plot(m5.8s)
plot(m5.8s2)

Here it is more clear that the fits are behaving differently, with the posterior sampleing bR and bL differently in the second version.

pairs(m5.8s)

pairs(m5.8s2)

This shows that the posterior for bl and br are skewed in the second fit but not in the first.

Comparing the distributions…

post.df <- melt(data.frame(bl.m1=extract.samples(m5.8s)$bl,
                           br.m1=extract.samples(m5.8s)$br,
                           bl.m2=extract.samples(m5.8s2)$bl,
                           br.m2=extract.samples(m5.8s2)$br))
## No id variables; using all as measure variables
post.df$model=substr(post.df$variable,4,5)
post.df$parameter=substr(post.df$variable,1,2)
pl <- ggplot(post.df,aes(x=value,fill=model))
pl <- pl + geom_density(alpha=0.2)
pl <- pl + facet_wrap(~ parameter,ncol=2)
pl

8H4

Use DIC or WAIC to compare the effective parameters.

compare(m5.8s,m5.8s2)
##         WAIC pWAIC dWAIC weight    SE dSE
## m5.8s  176.7   3.6   0.0   0.85 10.95  NA
## m5.8s2 180.1   2.9   3.4   0.15 11.63   4
compare(m5.8s,m5.8s2,func=DIC)
##          DIC  pD dDIC weight
## m5.8s  176.8 3.9  0.0   0.85
## m5.8s2 180.3 3.2  3.5   0.15

By both WAIC and DIC the model with the constrained bR has a somewhat smaller number of parameters. Would we really expect this to be the case? In both cases they are completely correlated so should only really be 1 parameter. But it muse be that since now they are constrained they are sampling yet less space.

8H5

Modify the Metropolis code so that it works even it labels do not corredpond to population sizes

First version

num_weeks <- 1e5
pop.sizes <- sample(c(1:10)*100,replace = FALSE)
positions <- rep(0,num_weeks)
current <- 10
for ( i in 1:num_weeks ) {
  # record current position
  positions[i] <- current
  # flip coin to generate proposal
  proposal <- current + sample( c(-1,1) , size=1 )
  # now make sure he loops around the archipelago
  if ( proposal < 1 ) proposal <- 10
  if ( proposal > 10 ) proposal <- 1
  # move?
  prob_move <- pop.sizes[proposal]/pop.sizes[current]
  current <- ifelse( runif(1) < prob_move , proposal , current )
}
visits <- as.vector(table(positions))
names(visits) <- pop.sizes
visits <- visits[order(as.numeric(names(visits)))]
barplot(visits)

Works!

It would be nice to have a version where the island names are arbitrary

Second version

num_weeks <- 1e5

islands <- data.frame(
  name=LETTERS[1:10],
  pop.size=sample(c(1:10)*100,replace = FALSE),stringsAsFactors = FALSE)

positions <- rep(0,num_weeks)
current <- 10
for ( i in 1:num_weeks ) {
  # record current position
  positions[i] <- islands$name[current]
  # flip coin to generate proposal
  proposal <- current + sample( c(-1,1) , size=1 )
  # now make sure he loops around the archipelago
  if ( proposal < 1 ) proposal <- 10
  if ( proposal > 10 ) proposal <- 1
  # move?
  prob_move <- islands$pop.size[proposal]/islands$pop.size[current]
  current <- ifelse( runif(1) < prob_move , proposal , current )
}
visits <- table(positions)
names(visits) <- paste(names(visits),islands$pop.size)
visits <- as.data.frame(visits[order(islands$pop.size)])
names(visits) <- c("name","visits")
ggplot(visits,aes(x=name,y=visits)) + geom_bar(stat="identity",fill="skyblue")

Works!

8H6

Modify the Metropolis code to write your own MCMC estimate for the globe tossing data and model from Chapter 2.

Lets think about this. We have one parameter that we are trying to estimate, the proportion of water on the globe. We can propose different values of this parameter from 0 to 1. The probability of accepting the proposal corresponds to the likelihood of the new proposal compared to the likelihood of the current parameter.

I wonder if my proposals can come from runif or if they should come from a grid? One way to find out…

tosses <- c("W","L","W","W","W","L","W","L","W")
water <- sum(tosses=="W")
land <- sum(tosses=="L")

chain.length <- 100000

posterior <- matrix(NA,nrow=chain.length,ncol = 2)

colnames(posterior) <- c("p","log.lik")

p <- runif(1,0,1)

posterior[1,] <- c(p,dbinom(water,water+land,prob=p,log=TRUE))

for(i in 2:chain.length) {
  proposal <- runif(1,0,1)
  log.lik <- dbinom(water,water+land,prob=proposal,log=TRUE)
  prob.move <- exp(log.lik - posterior[i-1,"log.lik"])
  if (runif(1,0,1) < prob.move) {
    posterior[i,] <- c(proposal,log.lik)
  } else {
    posterior[i,] <- posterior[i-1,]
  }
}

posterior <- as.data.frame(posterior)

plot(posterior$p,type="l")

bin <- data.frame(x=seq(0,1,length.out = 100),y=dbinom(water,water+land,seq(0,1,length.out = 100)))
bin$y <- bin$y/max(bin$y)

pl <- ggplot(posterior,aes(x=p,y=..scaled..))
pl <- pl + geom_density(fill="blue",alpha=.1)
pl + geom_line(aes(x=x,y=y),data=bin,lty=2)

precis(posterior$p)
##       Mean StdDev |0.89 0.89|
## model 0.64   0.14  0.42  0.87

alternative, using a grid of possible p values

parameter.table <- data.frame(
  p = seq(0,1,length.out = 100)) #note: if the steps are small then it takes a LOT of time to sample well.
parameter.table$log.lik <- dbinom(water,water+land,prob=parameter.table$p,log=TRUE)

chain.length <- 100000

posterior <- matrix(NA,nrow=chain.length,ncol = 2)

colnames(posterior) <- c("p","log.lik")

current <- sample(1:nrow(parameter.table),size = 1)

for (i in 1:chain.length) {
  posterior[i,] <- as.matrix(parameter.table[current,])
  proposal <- current + sample( c(-1,1) , size=1 )
  # now make sure we loop around
  if ( proposal < 1 ) proposal <- nrow(parameter.table)
  if ( proposal > nrow(parameter.table) ) proposal <- 1
  # move?
  prob.move <- exp(parameter.table$log.lik[proposal] - parameter.table$log.lik[current])
  #print(paste("current",current,"proposal",proposal,"prob.move",prob.move,sep=": "))
  current <- ifelse( runif(1) < prob.move , proposal , current ) 
  #print(paste("new current",current))
}

posterior <- as.data.frame(posterior)

plot(posterior$p,type="l")

bin <- data.frame(x=seq(0,1,length.out = 100),y=dbinom(water,water+land,seq(0,1,length.out = 100)))
bin$y <- bin$y/max(bin$y)

pl <- ggplot(posterior,aes(x=p,y=..scaled..))
pl <- pl + geom_density(fill="blue",alpha=.1)
pl + geom_line(aes(x=x,y=y),data=bin,lty=2)

precis(posterior$p)
##       Mean StdDev |0.89 0.89|
## model 0.64   0.13  0.44  0.85